import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_selection import f_classif

# 导入合并之后的表格
data_old = pd.read_csv(r"C:\Users\Lenovo\Desktop\机器学习数据\JUTTA DATA\合并数据.csv")
TOIMTUKI_DATA = pd.read_csv(r"C:\Users\Lenovo\Desktop\机器学习数据\JUTTA DATA\合并数据.csv",usecols=["TOIMTUKI_DATA"])

# 获取列名（获得所有的特征名称）
columns_name = data_old.columns.values

"先处理缺失值，再筛选TOIMTUKI_DATA非0行"

# 处理缺失值（使用中位数填补）
imp_median = SimpleImputer(strategy="median")                      #实例化，用中位数填补
DATA_without_NAN = imp_median.fit_transform(data_old)              #调取结果

# 导出数据
DATA_without_NAN = pd.DataFrame(DATA_without_NAN)
DATA_without_NAN.to_csv(r"C:\Users\Lenovo\Desktop\机器学习数据\JUTTA DATA\DATA_without_NAN.csv")